# Packages ----------------------------------------------------------------
library(dplyr)
library(ggplot2)

# Data --------------------------------------------------------------------
data <- read.csv("data_paper.csv")

hist(data$ppl_pred_trump)
hist(data$ppl_pred_biden)

mean(data$ppl_pred_trump, na.rm = TRUE)
mean(data$ppl_pred_biden, na.rm = TRUE)

median(data$ppl_pred_trump, na.rm = TRUE)
median(data$ppl_pred_biden, na.rm = TRUE)

# Analysis ----------------------------------------------------------------

colors <- c("biden" = "#0076CE",
            "trump" = "#FF0000",
            "middle" = "grey45",
            "rfk" = "#FFD700")

data %>% 
  tidyr::pivot_longer(., cols = c("ppl_pred_trump", "ppl_pred_biden"),
                      names_to = "candidate", values_to = "likelihood",
                      names_prefix = "ppl_pred_") %>%
  group_by(candidate) %>% 
  summarise(likelihood_Average = mean(likelihood, na.rm = TRUE),
            likelihood_Median = median(likelihood, na.rm = TRUE)) %>% 
  tidyr::pivot_longer(., cols = c("likelihood_Average", "likelihood_Median"),
                      names_to = "stat", values_to = "likelihood",
                      names_prefix = "likelihood_") %>%
  group_by(stat) %>% 
  mutate(estimated_vote_share = likelihood / sum(likelihood)) %>% 
  ggplot(aes(x = reorder(candidate, -estimated_vote_share),
             y = estimated_vote_share, fill = candidate)) +
  geom_col(color = NA, width = 0.85, alpha = 0.35) +
  geom_text(aes(label = scales::percent(estimated_vote_share, accuracy = 1),
                color = candidate, y = estimated_vote_share - 0.02),
            size = 5) +
  geom_text(aes(label = scales::percent(estimated_vote_share, accuracy = 1),
                y = estimated_vote_share - 0.02),
            color = "black",
            alpha = 0.2,
            size = 5) +
  facet_wrap(~stat) +
  scale_fill_manual(values = colors) +
  scale_color_manual(values = colors) +
  scale_x_discrete(breaks = c("trump", "biden"),
                   labels = c("Trump", "Biden")) +
  scale_y_continuous(name = "Forecasted National\nVote Share (%)\n",
                     labels = scales::percent_format(accuracy = 1),
                     expand = c(0, 0),
                     limits = c(0, 0.6)) +
  labs(caption = "\nThe forecasted national vote share is computed by dividing the central likelihood for a candidate by the sum of\n  the central likelihoods of all candidates.\nThe central likelihood is calculated using the average or the median.") +
  theme(axis.title.x = element_blank(),
        axis.title.y = element_text(size = 14),
        axis.text.x = element_text(size = 13),
        axis.text.y = element_text(size = 13),
        strip.background.x = element_rect(fill = "grey90", color = NA),
        strip.text.x = element_text(size = 15),
        panel.background = element_rect(color = "grey90", fill = NA),
        panel.grid.major.y = element_blank(),
        legend.position = "none")

ggsave("fig1_national_vote_share.png",
       width = 9, height = 5.5, dpi = 300)

